library(ggplot2)
library(lme4)
library(texreg)

## setwd to directory containing data (.csv) file
setwd("/Users/<path>/R_work")


d <- read.csv("combinedData_individual.csv")

d$notmale <- as.numeric(d$gender != "male")
d$notmale[d$gender == "unknown"] <- NA
summary(d$notmale)

#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#   0.000   0.000   0.000   0.417   1.000   1.000    6850 

d$notwhite <- as.numeric(d$ethnicity != "white")
d$notwhite[d$ethnicity == "unknown"] <- NA
summary(d$notwhite)

#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#   0.000   0.000   1.000   0.606   1.000   1.000    6850 

d$continued <- as.numeric(d$outcome == "yes")
d$continued[d$outcome == "unknown"] <- NA
summary(d$continued)

#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#  0.000   0.000   1.000   0.645   1.000   1.000   10867 

d$interest <- as.numeric(d$codingInterest %in% c("more", "low"))
d$interest[d$codingInterest == "unknown"] <- NA
summary(d$interest)

#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's
#   0.000   1.000   1.000   0.826   1.000   1.000    6850 

## create variable for event regions
## (combine event ID (Date) and region ID (Location) to create unique identifiers)
## Note: An "event" refers to all meetings held on a given day, nationwide.
d$eventregion <- as.factor(paste(d$newEventId, d$newRegionId, sep="-"))
d$eventregion.num <- as.numeric(d$eventregion)
table(d$eventregion.num)

# Data Summaries - Frequencies

xtabs(~continued + notmale, data = d)

#               notmale
#   continued    0    1
#           0  615  519
#           1 1210  846

xtabs(~continued + notwhite, data = d)

#              notwhite
#   continued    0    1
#           0  369  765
#           1  764 1292

xtabs(~continued + interest, data = d)

#               interest
#   continued    0    1
#           0  360  774
#           1  419 1637

#*****************************************************************
#
# Logistical regression for H1a (gender and continued engagement)
#
#*****************************************************************

summary(glmer(continued ~ notmale + (1|eventregion), data = d, family = binomial(logit)))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notmale + (1 | eventregion)
# Data: d
#
# AIC      BIC     logLik    deviance   df.resid 
# 4056.1   4074.3  -2025.0   4050.1     3187 
#
# Scaled residuals: 
#    Min      1Q  Median      3Q     Max 
# -2.1750 -1.0750  0.5782  0.7189  1.2171 
#
# Random effects:
#    Groups      Name        Variance Std.Dev.
# eventregion (Intercept)    0.3112   0.5579  
# Number of obs: 3190, groups:  eventregion, 141
#
# Fixed effects:
#              Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.82784    0.07612  10.875   <2e-16 ***
#    notmale     -0.17497    0.07764  -2.253   0.0242 *  
#    ---
#    Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Correlation of Fixed Effects:
#         (Intr)
# notmale -0.450

summary(glmer(continued ~ notmale + interest + (1|eventregion), data = d, family = binomial(logit)))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notmale + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4004.1   4028.4  -1998.0   3996.1     3186 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4193 -1.0823  0.5357  0.7296  1.5716 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3337   0.5777  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#              Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.34669    0.10028   3.457 0.000546 ***
# notmale     -0.18229    0.07844  -2.324 0.020128 *  
# interest     0.65298    0.08872   7.360 1.84e-13 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#           (Intr) notmal
# notmale  -0.333       
# interest -0.633 -0.019

#*******************************************************************
#
# Logistical regression for H1b (ethnicity and continued engagement)
#
#*******************************************************************

summary(glmer(continued ~ notwhite + (1|eventregion), data = d, family = binomial(logit)))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notwhite + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4056.1   4074.3  -2025.1   4050.1     3187 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.1693 -1.0828  0.5732  0.7236  1.1795 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.31     0.5568  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#              Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.86868    0.08603  10.097   <2e-16 ***
# notwhite    -0.18398    0.08232  -2.235   0.0254 *  
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#           (Intr)
# notwhite -0.615

summary(glmer(continued ~ notwhite + interest + (1|eventregion), data = d, family = binomial(logit)))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notwhite + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4004.3   4028.5  -1998.1   3996.3     3186 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.3755 -1.0758  0.5338  0.7284  1.5199 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.333    0.577   
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.38903    0.10815   3.597 0.000322 ***
# notwhite    -0.18958    0.08316  -2.280 0.022632 *  
# interest     0.65195    0.08868   7.352 1.95e-13 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#           (Intr) notwht
# notwhite -0.487       
# interest -0.585 -0.014

#*******************************************************************
#
# combination models 
#
#*******************************************************************

model.h1.combo <- glmer(continued ~ notmale + notwhite + interest + (1|eventregion), data = d, family = binomial(logit))
summary(model.h1.combo)

##OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notmale + notwhite + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4000.6   4031.0  -1995.3   3990.6     3185 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4640 -1.0660  0.5408  0.7295  1.5985 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3327   0.5768  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.47057    0.11361   4.142 3.44e-05 ***
# notmale     -0.18649    0.07852  -2.375   0.0175 *  
# notwhite    -0.19412    0.08327  -2.331   0.0197 *  
# interest     0.65486    0.08879   7.376 1.63e-13 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#          (Intr) notmal notwht
# notmale  -0.305              
# notwhite -0.471  0.025       
# interest -0.551 -0.020 -0.015

model.h1.gen <- glmer(continued ~ notmale + interest + (1|eventregion), data = d, family = binomial(logit))
summary(model.h1.gen)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notmale + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4004.1   4028.4  -1998.0   3996.1     3186 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4193 -1.0823  0.5357  0.7296  1.5716 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3337   0.5777  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.34669    0.10028   3.457 0.000546 ***
# notmale     -0.18229    0.07844  -2.324 0.020128 *  
# interest     0.65298    0.08872   7.360 1.84e-13 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#          (Intr) notmal
# notmale  -0.333       
# interest -0.633 -0.019

model.h1.eth <- glmer(continued ~ notwhite + interest + (1|eventregion), data = d, family = binomial(logit))
summary(model.h1.eth)

##OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notwhite + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4004.3   4028.5  -1998.1   3996.3     3186 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.3755 -1.0758  0.5338  0.7284  1.5199 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.333    0.577   
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.38903    0.10815   3.597 0.000322 ***
# notwhite    -0.18958    0.08316  -2.280 0.022632 *  
# interest     0.65195    0.08868   7.352 1.95e-13 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#           (Intr) notwht
# notwhite -0.487       
# interest -0.585 -0.014

#---------------------------------------------------------------
#
# Summary for H1 (underrepresentation and continued engagement)
#
# In all logistical regression models used for H1, underrepresentation
# for either gender or ethnicity alone did not have a significant 
# association with the outcome of continued engagement.
# (Although the results did agree with our predictions of lower future
# engagement if a member of an underrepresented group.)
#
# However, for both H1a (Gender) and H1b (Ethnicity), when a control
# for prior level of coding interest was added, this variable WAS found
# to have a significant association with the outcome -->
#
# Having even a low interest in coding prior to attending an event 
# is associated with a greater likelihood of continuing to engage in coding 
# after the event (compared to having no prior interest in coding.)
#
#---------------------------------------------------------------



#*******************************************************************
#
# Logistical regression for H2 
# (interaction of gender and ethnicity vs. continued engagement)
#
#*******************************************************************

summary(glmer(continued ~ gender_pct + ethnicity_pct + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ gender_pct + ethnicity_pct + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4051.5   4075.8  -2021.8   4043.5     3186 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.3682 -1.0666  0.5674  0.7203  1.2843 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3265   0.5714  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)   
# (Intercept)     0.3348     0.1672   2.002  0.04524 * 
# gender_pct      0.4520     0.4828   0.936  0.34917   
# ethnicity_pct   1.9235     0.5977   3.218  0.00129 **
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) gndr_p
# gender_pct  -0.730       
# ethncty_pct -0.514 -0.043

summary(glmer(continued ~ gender_pct + ethnicity_pct + interest + (1|eventregion), data = d, family=binomial("logit")))

##OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ gender_pct + ethnicity_pct + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3999.4   4029.8  -1994.7   3989.4     3185 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.6085 -1.0879  0.5296  0.7331  1.5906 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3512   0.5926  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)    -0.1832     0.1837  -0.997  0.31856    
# gender_pct      0.5669     0.4890   1.159  0.24628    
# ethnicity_pct   1.9380     0.6049   3.204  0.00136 ** 
# interest        0.6549     0.0889   7.366 1.75e-13 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) gndr_p ethnc_
# gender_pct  -0.687              
# ethncty_pct -0.478 -0.042       
# interest    -0.383  0.039  0.012


## add interaction terms for gender_pct * ethnicity_pct (collinear problems)
summary(glmer(continued ~ gender_pct + ethnicity_pct + gender_pct * ethnicity_pct + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ gender_pct + ethnicity_pct + gender_pct * ethnicity_pct +      (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4053.5   4083.8  -2021.8   4043.5     3185 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.3867 -1.0661  0.5687  0.7220  1.2840 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3256   0.5706  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#                          Estimate Std. Error z value Pr(>|z|)
# (Intercept)                0.3742     0.3157   1.185    0.236
# gender_pct                 0.3060     1.1048   0.277    0.782
# ethnicity_pct              1.6712     1.8167   0.920    0.358
# gender_pct:ethnicity_pct   0.9081     6.1759   0.147    0.883
# 
# Correlation of Fixed Effects:
#             (Intr) gndr_p ethnc_
# gender_pct  -0.932              
# ethncty_pct -0.891  0.843       
# gndr_pct:t_  0.848 -0.900 -0.944

summary(glmer(continued ~ gender_pct + ethnicity_pct + gender_pct * ethnicity_pct + interest + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ gender_pct + ethnicity_pct + gender_pct * ethnicity_pct +  
#     interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4001.4   4037.8  -1994.7   3989.4     3184 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.6118 -1.0877  0.5296  0.7333  1.5908 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3511   0.5925  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#                          Estimate Std. Error z value Pr(>|z|)    
# (Intercept)              -0.17662    0.32466  -0.544    0.586    
# gender_pct                0.54259    1.10268   0.492    0.623    
# ethnicity_pct             1.89620    1.81092   1.047    0.295    
# interest                  0.65486    0.08892   7.365 1.78e-13 ***
# gender_pct:ethnicity_pct  0.15003    6.13773   0.024    0.980    
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) gndr_p ethnc_ intrst
# gender_pct  -0.912                     
# ethncty_pct -0.868  0.839              
# interest    -0.232  0.034  0.022       
# gndr_pct:t_  0.825 -0.896 -0.943 -0.019

model.h2.gen.combo <- glmer(continued ~ gender_pct + interest + (1|eventregion), data = d, family=binomial("logit"))
summary(model.h2.gen.combo)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ gender_pct + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4007.7   4032.0  -1999.9   3999.7     3186 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.3969 -1.0855  0.5357  0.7374  1.6537 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3401   0.5832  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  0.09774    0.16049   0.609    0.543    
# gender_pct   0.64281    0.48530   1.325    0.185    
# interest     0.65434    0.08874   7.374 1.65e-13 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#            (Intr) gndr_p
# gender_pct -0.807       
# interest   -0.430  0.038

## let's look at gender_pct within male and notmale
model.h2.gen.notmale <- glmer(continued ~ gender_pct + interest + (1|eventregion), data = d[as.logical(d$notmale),], family=binomial("logit"))
summary(model.h2.gen.notmale)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ gender_pct + interest + (1 | eventregion)
# Data: d[as.logical(d$notmale), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 1749.6   1770.4   -870.8   1741.6     1361 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.2922 -1.0277  0.5544  0.7339  1.7666 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.4163   0.6452  
# Number of obs: 1365, groups:  eventregion, 134
# 
# Fixed effects:
#             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)   0.2814     0.2476   1.137    0.256    
# gender_pct   -1.0389     1.0151  -1.023    0.306    
# interest      0.7389     0.1377   5.364 8.13e-08 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#            (Intr) gndr_p
# gender_pct -0.837       
# interest   -0.442  0.041
    
model.h2.gen.male <- glmer(continued ~ gender_pct + interest + (1|eventregion), data = d[!as.logical(d$notmale),], family=binomial("logit"))
summary(model.h2.gen.male)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ gender_pct + interest + (1 | eventregion)
# Data: d[!as.logical(d$notmale), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 2280.7   2302.7  -1136.3   2272.7     1821 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4384 -1.1553  0.5534  0.6977  1.3254 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.2998   0.5476  
# Number of obs: 1825, groups:  eventregion, 141
# 
# Fixed effects:
#             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)   0.4657     0.3347   1.391    0.164    
# gender_pct   -0.2853     0.9969  -0.286    0.775    
# interest      0.5769     0.1180   4.890 1.01e-06 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#            (Intr) gndr_p
# gender_pct -0.939       
# interest   -0.265  0.013

## do the whole thing in a  single model
summary(glmer(continued ~ notmale * gender_pct + interest + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notmale * gender_pct + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4007.8   4044.2  -1997.9   3995.8     3184 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4129 -1.0778  0.5385  0.7306  1.5647 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3332   0.5773  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#                    Estimate Std. Error z value Pr(>|z|)    
# (Intercept)         0.49757    0.31020   1.604    0.109    
# notmale            -0.31806    0.34639  -0.918    0.359    
# gender_pct         -0.47447    0.92153  -0.515    0.607    
# interest            0.65230    0.08878   7.347 2.02e-13 ***
# notmale:gender_pct  0.39401    1.23447   0.319    0.750    
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) notmal gndr_p intrst
# notmale     -0.794                     
# gender_pct  -0.946  0.809              
# interest    -0.213 -0.016  0.010       
# ntml:gndr_p  0.652 -0.949 -0.683  0.020

## look at the same thing with but based on ethnicity
## lets look at gender_pct within male and notmale
summary(glmer(continued ~ ethnicity_pct + interest + (1|eventregion), data = d[as.logical(d$notmale),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ ethnicity_pct + interest + (1 | eventregion)
# Data: d[as.logical(d$notmale), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 1746.8   1767.7   -869.4   1738.8     1361 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4801 -1.0312  0.5521  0.7278  1.6978 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.4121   0.6419  
# Number of obs: 1365, groups:  eventregion, 134
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)    -0.2122     0.1976  -1.074   0.2829    
# ethnicity_pct   1.7687     0.9081   1.948   0.0515 .  
# interest        0.7481     0.1377   5.434 5.52e-08 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) ethnc_
# ethncty_pct -0.729       
# interest    -0.525  0.020

summary(glmer(continued ~ ethnicity_pct + interest + (1|eventregion), data = d[!as.logical(d$notmale),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ ethnicity_pct + interest + (1 | eventregion)
# Data: d[!as.logical(d$notmale), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 2275.4   2297.4  -1133.7   2267.4     1821 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.3891 -1.1356  0.5489  0.6977  1.3447 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3142   0.5606  
# Number of obs: 1825, groups:  eventregion, 141
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)     0.0990     0.1661   0.596   0.5512    
# ethnicity_pct   1.8202     0.7868   2.314   0.0207 *  
#     interest        0.5763     0.1183   4.872  1.1e-06 ***
#     ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) ethnc_
# ethncty_pct -0.715       
# interest    -0.515  0.005

#######################################


## similar analysis stratified but by ethnicity
model.h2.eth.combo <- glmer(continued ~ ethnicity_pct + interest + (1|eventregion), data = d, family=binomial("logit"))
summary(model.h2.eth.combo)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ ethnicity_pct + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3998.8   4023.0  -1995.4   3990.8     3186 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.5188 -1.0935  0.5299  0.7278  1.5905 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3438   0.5863  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)   -0.03691    0.13307  -0.277  0.78147    
# ethnicity_pct  1.96990    0.60311   3.266  0.00109 ** 
# interest       0.65130    0.08878   7.336  2.2e-13 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) ethnc_
# ethncty_pct -0.700       
# interest    -0.492  0.014

model.h2.eth.notwhite <- glmer(continued ~ ethnicity_pct + interest + (1|eventregion), data = d[as.logical(d$notwhite),], family=binomial("logit"))
summary(model.h2.eth.notwhite)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ ethnicity_pct + interest + (1 | eventregion)
# Data: d[as.logical(d$notwhite), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 2617.7   2640.2  -1304.8   2609.7     2053 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.1950 -1.0443  0.5343  0.7415  1.5507 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3949   0.6284  
# Number of obs: 2057, groups:  eventregion, 138
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)   -0.00744    0.15774  -0.047   0.9624    
# ethnicity_pct  1.70537    0.86978   1.961   0.0499 *  
# interest       0.64696    0.11102   5.827 5.63e-09 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) ethnc_
# ethncty_pct -0.693       
# interest    -0.520  0.022

model.h2.eth.white <- glmer(continued ~ ethnicity_pct + interest + (1|eventregion), data = d[!as.logical(d$notwhite),], family=binomial("logit"))
summary(model.h2.eth.white)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ ethnicity_pct + interest + (1 | eventregion)
# Data: d[!as.logical(d$notwhite), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 1404.3   1424.4   -698.1   1396.3     1129 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.0441 -1.1583  0.5506  0.6747  1.4662 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.264    0.5139  
# Number of obs: 1133, groups:  eventregion, 137
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)   -0.01788    0.30700  -0.058    0.954    
# ethnicity_pct  1.78067    1.36983   1.300    0.194    
# interest       0.63681    0.15067   4.227 2.37e-05 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) ethnc_
# ethncty_pct -0.888       
# interest    -0.334 -0.026

## colinearity
summary(glmer(continued ~ notwhite * ethnicity_pct + interest + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notwhite * ethnicity_pct + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 4002.4   4038.8  -1995.2   3990.4     3184 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.5106 -1.0871  0.5274  0.7291  1.5240 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3425   0.5852  
# Number of obs: 3190, groups:  eventregion, 141
# 
# Fixed effects:
#                        Estimate Std. Error z value Pr(>|z|)    
# (Intercept)             0.08035    0.28033   0.287    0.774    
# notwhite               -0.11435    0.28495  -0.401    0.688    
# ethnicity_pct           1.51829    1.26828   1.197    0.231    
# interest                0.65214    0.08881   7.343 2.09e-13 ***
# notwhite:ethnicity_pct  0.31544    1.48191   0.213    0.831    
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) notwht ethnc_ intrst
# notwhite    -0.867                     
# ethncty_pct -0.922  0.880              
# interest    -0.211 -0.026 -0.017       
# ntwht:thnc_  0.760 -0.937 -0.822  0.025

#---------------------------------------------------------------
#
# Summary for H2 
# (interaction of gender & ethinicity and continued engagement)
#
# The results from this R analysis seem to indicate that while being
# non-male or non-white are both **individually** associated with 
# lower likelihood of continuing with coding, if a participant is BOTH
# non-male AND non-white, they are MORE likely to continue engaging.
# However, all the above effects fails to reach the level of statistical
# significance.
#
# Again, when the control for prior level of coding interest (low vs no)
# is added, this variable is found to be strongly significant, with even
# low-expressed prior interest being associated with a greater likelihood 
# of continued engagement over expressing no prior interest in coding.
#---------------------------------------------------------------

#*******************************************************************
#
# Logistical regression for H3 
# (presentation vs. continued engagement)
#
#*******************************************************************

# Make new "presented" variable where 1 = yes and 0 = no
d$presented.new <- as.numeric(d$presented == "yes")
d$presented.new[d$presented == "unknown"] <- NA

# Frequencies for Presented
table(d$presented.new)

# 0     1 
# 723 12729 

# Frequencies for Presented - For Students with Complete Data
# This looks only at students who provided all voluntary data and returned all surveys.
table(d$presented.new[complete.cases(d)])

# 0    1 
# 150 2957

## version w/o random effects
summary(glm(continued ~ presented.new, data = d, family=binomial("logit")))

## OUTPUT:
# Call:
#     glm(formula = continued ~ presented.new, family = binomial("logit"), 
#         data = d)
# 
# Deviance Residuals: 
#     Min       1Q   Median       3Q      Max  
# -1.4563  -1.4563   0.9221   0.9221   1.0879  
# 
# Coefficients:
#               Estimate Std. Error z value Pr(>|z|)  
# (Intercept)     0.2141     0.1642   1.304   0.1923  
# presented.new   0.4212     0.1687   2.496   0.0125 *
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# (Dispersion parameter for binomial family taken to be 1)
# 
# Null deviance: 4027.6  on 3106  degrees of freedom
# Residual deviance: 4021.5  on 3105  degrees of freedom
# (10950 observations deleted due to missingness)
# AIC: 4025.5
# 
# Number of Fisher Scoring iterations: 4

## WITH RANDOM EFFECTS (from specific event)

## presented
summary(glmer(continued ~ presented.new + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3934.2   3952.4  -1964.1   3928.2     3104 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.1861 -1.0882  0.5642  0.6945  1.2066 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3095   0.5563  
# Number of obs: 3107, groups:  eventregion, 136
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)
# (Intercept)     0.3657     0.3369   1.085    0.278
# presented.new   0.4370     0.3435   1.272    0.203
# 
# Correlation of Fixed Effects:
#             (Intr)
# presentd.nw -0.979

## presented & interest
summary(glmer(continued ~ presented.new + interest + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3877.7   3901.9  -1934.8   3869.7     3103 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4603 -1.1070  0.5309  0.7326  1.5781 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3302   0.5746  
# Number of obs: 3107, groups:  eventregion, 136
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)    -0.1810     0.3532  -0.512    0.608    
# presented.new   0.4789     0.3528   1.357    0.175    
# interest        0.6875     0.0897   7.664  1.8e-14 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt.
# presentd.nw -0.963       
# interest    -0.201  0.020

## look at gender
summary(glmer(continued ~ notmale + presented.new + notmale * presented.new + interest + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notmale + presented.new + notmale * presented.new +      interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3871.6   3907.8  -1929.8   3859.6     3101 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4344 -1.0863  0.5280  0.7299  2.0351 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.328    0.5728  
# Number of obs: 3107, groups:  eventregion, 136
# 
# Fixed effects:
#                       Estimate Std. Error z value Pr(>|z|)    
# (Intercept)            0.21533    0.38722   0.556   0.5781    
# notmale               -0.93583    0.35721  -2.620   0.0088 ** 
# presented.new          0.14315    0.38878   0.368   0.7127    
# interest               0.68966    0.08986   7.675 1.66e-14 ***
# notmale:presented.new  0.79047    0.36649   2.157   0.0310 *  
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) notmal prsnt. intrst
# notmale     -0.400                     
# presentd.nw -0.965  0.400              
# interest    -0.180 -0.009  0.016       
# ntml:prsnt.  0.390 -0.975 -0.410  0.005

model.h3.gender <- glmer(continued ~ notmale + presented.new + notmale * presented.new + gender_pct + interest + (1|eventregion), data = d, family=binomial("logit"))
summary(model.h3.gender)

## OUTPUT
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notmale + presented.new + notmale * presented.new +  
#     gender_pct + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3873.2   3915.4  -1929.6   3859.2     3100 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4338 -1.0792  0.5319  0.7246  2.0460 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3246   0.5697  
# Number of obs: 3107, groups:  eventregion, 136
# 
# Fixed effects:
#                       Estimate Std. Error z value Pr(>|z|)    
# (Intercept)            0.35818    0.44270   0.809  0.41848    
# notmale               -0.98655    0.36575  -2.697  0.00699 ** 
# presented.new          0.14417    0.38789   0.372  0.71014    
# gender_pct            -0.45450    0.68758  -0.661  0.50861    
# interest               0.68775    0.08989   7.651 1.99e-14 ***
# notmale:presented.new  0.78942    0.36687   2.152  0.03142 *  
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) notmal prsnt. gndr_p intrst
# notmale     -0.445                            
# presentd.nw -0.840  0.392                     
# gender_pct  -0.488  0.210 -0.005              
# interest    -0.172 -0.003  0.016  0.029       
# ntml:prsnt.  0.340 -0.952 -0.412  0.004  0.005

## look at ethnicity
summary(glmer(continued ~ notwhite + presented.new + notwhite * presented.new + interest + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notwhite + presented.new + notwhite * presented.new +  
#     interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3873.5   3909.7  -1930.7   3861.5     3101 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.5039 -1.0851  0.5287  0.7295  1.6762 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3264   0.5713  
# Number of obs: 3107, groups:  eventregion, 136
# 
# Fixed effects:
#                        Estimate Std. Error z value Pr(>|z|)    
# (Intercept)              0.5154     0.5175   0.996   0.3193    
# notwhite                -0.9024     0.4743  -1.903   0.0571 .  
# presented.new           -0.1045     0.5201  -0.201   0.8407    
# interest                 0.6875     0.0898   7.656 1.92e-14 ***
# notwhite:presented.new   0.7229     0.4821   1.500   0.1337    
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) notwht prsnt. intrst
# notwhite    -0.730                     
# presentd.nw -0.977  0.726              
# interest    -0.144  0.010  0.022       
# ntwht:prsn.  0.719 -0.984 -0.733 -0.013

model.h3.eth  <- glmer(continued ~ notwhite + presented.new + notwhite * presented.new + ethnicity_pct + interest + (1|eventregion), data = d, family=binomial("logit"))
summary(model.h3.eth)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notwhite + presented.new + notwhite * presented.new +  
#     ethnicity_pct + interest + (1 | eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3868.5   3910.8  -1927.2   3854.5     3100 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.6147 -1.0847  0.5212  0.7280  1.6200 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3353   0.5791  
# Number of obs: 3107, groups:  eventregion, 136
# 
# Fixed effects:
#                        Estimate Std. Error z value Pr(>|z|)    
# (Intercept)             0.24845    0.53109   0.468  0.63993    
# notwhite               -0.89429    0.47512  -1.882  0.05980 .  
# presented.new          -0.23869    0.52644  -0.453  0.65025    
# ethnicity_pct           1.95893    0.73886   2.651  0.00802 ** 
# interest                0.68740    0.08992   7.645 2.09e-14 ***
# notwhite:presented.new  0.86829    0.48604   1.786  0.07403 .  
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) notwht prsnt. ethnc_ intrst
# notwhite    -0.716                            
# presentd.nw -0.937  0.720                     
# ethncty_pct -0.190  0.006 -0.095              
# interest    -0.142  0.010  0.021  0.008       
# ntwht:prsn.  0.678 -0.977 -0.734  0.113 -0.012
# optimizer (Nelder_Mead) convergence code: 0 (OK)
# Model failed to converge with max|grad| = 0.00220802 (tol = 0.002, component 1)

## REPEATING WITH DIFFERENT OPTIMIZER SPECIFICATION:
model.h3.eth  <- glmer(continued ~ notwhite + presented.new + notwhite * presented.new + ethnicity_pct + interest + (1|eventregion), data = d, family=binomial("logit"), control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))
summary(model.h3.eth)

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notwhite + presented.new + notwhite * presented.new +  
#     ethnicity_pct + interest + (1 | eventregion)
# Data: d
# Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3868.5   3910.8  -1927.2   3854.5     3100 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.6148 -1.0848  0.5212  0.7280  1.6200 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3353   0.579   
# Number of obs: 3107, groups:  eventregion, 136
# 
# Fixed effects:
#                        Estimate Std. Error z value Pr(>|z|)    
# (Intercept)             0.24850    0.53081   0.468   0.6397    
# notwhite               -0.89451    0.47482  -1.884   0.0596 .  
# presented.new          -0.23874    0.52616  -0.454   0.6500    
# ethnicity_pct           1.95877    0.73857   2.652   0.0080 ** 
# interest                0.68742    0.08992   7.645 2.09e-14 ***
# notwhite:presented.new  0.86849    0.48574   1.788   0.0738 .  
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) notwht prsnt. ethnc_ intrst
# notwhite    -0.716                            
# presentd.nw -0.937  0.720                     
# ethncty_pct -0.190  0.006 -0.095              
# interest    -0.142  0.009  0.021  0.008       
# ntwht:prsn.  0.678 -0.977 -0.734  0.113 -0.012

## adding ethnicity_pct adds very little
summary(glmer(continued ~ notwhite + presented.new + notwhite * presented.new + ethnicity_pct + notwhite * ethnicity_pct + interest + (1|eventregion), data = d, family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ notwhite + presented.new + notwhite * presented.new +  
#     ethnicity_pct + notwhite * ethnicity_pct + interest + (1 |      eventregion)
# Data: d
# 
# AIC      BIC      logLik   deviance   df.resid 
# 3870.3   3918.6  -1927.1   3854.3     3099 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.5873 -1.0829  0.5209  0.7251  1.6174 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3357   0.5794  
# Number of obs: 3107, groups:  eventregion, 136
# 
# Fixed effects:
#                        Estimate Std. Error z value Pr(>|z|)    
# (Intercept)             0.30793    0.55032   0.560   0.5758    
# notwhite               -0.97621    0.51493  -1.896   0.0580 .  
# presented.new          -0.20651    0.53200  -0.388   0.6979    
# ethnicity_pct           1.51188    1.31488   1.150   0.2502    
# interest                0.68860    0.08997   7.653 1.96e-14 ***
# notwhite:presented.new  0.83596    0.49230   1.698   0.0895 .  
# notwhite:ethnicity_pct  0.62946    1.53116   0.411   0.6810    
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) notwht prsnt. ethnc_ intrst ntwh:.
# notwhite    -0.739                                   
# presentd.nw -0.855  0.600                            
# ethncty_pct -0.321  0.323 -0.175                     
# interest    -0.128 -0.004  0.026 -0.023              
# ntwht:prsn.  0.603 -0.827 -0.740  0.196 -0.017       
# ntwht:thnc_  0.263 -0.386  0.147 -0.827  0.033 -0.161

## stratified version of the two models for gender
summary(glmer(continued ~ presented.new + gender_pct + interest + (1|eventregion), data = d[as.logical(d$notmale),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + gender_pct + interest + (1 | eventregion)
# Data: d[as.logical(d$notmale), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 1685.4   1711.4   -837.7   1675.4     1319 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.2900 -1.0440  0.5504  0.7234  2.0157 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3747   0.6121  
# Number of obs: 1324, groups:  eventregion, 129
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)    -0.6302     0.4893  -1.288   0.1978    
# presented.new   0.9639     0.4318   2.232   0.0256 *  
# gender_pct     -1.0623     1.0300  -1.031   0.3024    
# interest        0.7595     0.1391   5.458 4.81e-08 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt. gndr_p
# presentd.nw -0.860              
# gender_pct  -0.449  0.023       
# interest    -0.247  0.028  0.041

summary(glmer(continued ~ presented.new + gender_pct + interest + (1|eventregion), data = d[!as.logical(d$notmale),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + gender_pct + interest + (1 | eventregion)
# Data: d[!as.logical(d$notmale), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 2212.4   2239.8  -1101.2   2202.4     1778 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.5042 -1.1355  0.5407  0.6871  1.3525 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3128   0.5593  
# Number of obs: 1783, groups:  eventregion, 136
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)     0.4616     0.5019   0.920    0.358    
# presented.new   0.1359     0.3872   0.351    0.726    
# gender_pct     -0.6801     1.0181  -0.668    0.504    
# interest        0.6286     0.1196   5.256 1.47e-07 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt. gndr_p
# presentd.nw -0.730              
# gender_pct  -0.628 -0.019       
# interest    -0.189  0.022  0.006


## interaction terms

summary(glmer(continued ~ presented.new + gender_pct + presented.new * gender_pct +  interest + (1|eventregion), data = d[as.logical(d$notmale),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + gender_pct + presented.new * gender_pct +      interest + (1 | eventregion)
# Data: d[as.logical(d$notmale), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 1685.4   1716.5   -836.7   1673.4     1318 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.2718 -1.0478  0.5477  0.7233  2.3082 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3616   0.6013  
# Number of obs: 1324, groups:  eventregion, 129
# 
# Fixed effects:
#                           Estimate Std. Error z value Pr(>|z|)    
# (Intercept)                0.7230     1.0939   0.661    0.509    
# presented.new             -0.4621     1.1136  -0.415    0.678    
# gender_pct                -7.5705     4.7994  -1.577    0.115    
# interest                   0.7622     0.1391   5.478  4.3e-08 ***
# presented.new:gender_pct   6.8507     4.9138   1.394    0.163    
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt. gndr_p intrst
# presentd.nw -0.974                     
# gender_pct  -0.917  0.903              
# interest    -0.091 -0.009 -0.012       
# prsntd.nw:_  0.895 -0.922 -0.977  0.022

summary(glmer(continued ~ presented.new + gender_pct + presented.new * gender_pct + interest + (1|eventregion), data = d[!as.logical(d$notmale),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + gender_pct + presented.new * gender_pct +      interest + (1 | eventregion)
# Data: d[!as.logical(d$notmale), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 2210.5   2243.4  -1099.3   2198.5     1777 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -3.1505 -1.1234  0.5444  0.6872  1.3929 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.289    0.5375  
# Number of obs: 1783, groups:  eventregion, 136
# 
# Fixed effects:
#                          Estimate Std. Error z value Pr(>|z|)    
# (Intercept)                4.4174     2.2292   1.982   0.0475 *  
# presented.new             -3.9220     2.2528  -1.741   0.0817 .  
# gender_pct               -13.3168     6.9369  -1.920   0.0549 .  
# interest                   0.6239     0.1195   5.223 1.76e-07 ***
# presented.new:gender_pct  12.9490     7.0173   1.845   0.0650 .  
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt. gndr_p intrst
# presentd.nw -0.988                     
# gender_pct  -0.984  0.974              
# interest    -0.045  0.006  0.003       
# prsntd.nw:_  0.974 -0.985 -0.990 -0.002

## stratified version of the two models for ethnicity
summary(glmer(continued ~  presented.new + ethnicity_pct + interest + (1|eventregion), data = d[as.logical(d$notwhite),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + ethnicity_pct + interest + (1 | eventregion)
# Data: d[as.logical(d$notwhite), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 2546.9   2574.9  -1268.4   2536.9     2004 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.2015 -1.0569  0.5337  0.7329  1.5888 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3571   0.5976  
# Number of obs: 2009, groups:  eventregion, 134
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)    -0.6389     0.4007  -1.595   0.1108    
# presented.new   0.6321     0.3822   1.654   0.0982 .  
# ethnicity_pct   1.9558     0.8815   2.219   0.0265 *  
# interest        0.6678     0.1120   5.962  2.5e-09 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt. ethnc_
# presentd.nw -0.919              
# ethncty_pct -0.285  0.011       
# interest    -0.226  0.020  0.029

summary(glmer(continued ~ presented.new + ethnicity_pct + interest + (1|eventregion), data = d[!as.logical(d$notwhite),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + ethnicity_pct + interest + (1 | eventregion)
# Data: d[!as.logical(d$notwhite), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 1348.5   1373.5   -669.2   1338.5     1093 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.0894 -1.1033  0.5412  0.6602  1.4601 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.2463   0.4963  
# Number of obs: 1098, groups:  eventregion, 132
# 
# Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept)     0.2903     0.5387   0.539    0.590    
# presented.new  -0.3076     0.5052  -0.609    0.543    
# ethnicity_pct   1.7052     1.3956   1.222    0.222    
# interest        0.6954     0.1524   4.563 5.04e-06 ***
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt. ethnc_
# presentd.nw -0.822              
# ethncty_pct -0.348 -0.181       
# interest    -0.224  0.043 -0.035

## add interaction
summary(glmer(continued ~  presented.new + ethnicity_pct + interest + presented.new * ethnicity_pct + (1|eventregion), data = d[as.logical(d$notwhite),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + ethnicity_pct + interest + presented.new *      ethnicity_pct + (1 | eventregion)
# Data: d[as.logical(d$notwhite), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 2548.8   2582.4  -1268.4   2536.8     2003 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.2136 -1.0577  0.5324  0.7338  1.6173 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.3573   0.5977  
# Number of obs: 2009, groups:  eventregion, 134
# 
# Fixed effects:
#                             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)                  -0.4851     0.5779  -0.839    0.401    
# presented.new                 0.4685     0.5847   0.801    0.423    
# ethnicity_pct                 0.7691     3.3255   0.231    0.817    
# interest                      0.6659     0.1121   5.938 2.89e-09 ***
# presented.new:ethnicity_pct   1.2760     3.4471   0.370    0.711    
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt. ethnc_ intrst
# presentd.nw -0.962                     
# ethncty_pct -0.747  0.732              
# interest    -0.189  0.047  0.051       
# prsntd.nw:_  0.721 -0.757 -0.964 -0.045

summary(glmer(continued ~ presented.new + ethnicity_pct + interest  + presented.new * ethnicity_pct + (1|eventregion), data = d[!as.logical(d$notwhite),], family=binomial("logit")))

## OUTPUT:
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: binomial  ( logit )
# Formula: continued ~ presented.new + ethnicity_pct + interest + presented.new *      ethnicity_pct + (1 | eventregion)
# Data: d[!as.logical(d$notwhite), ]
# 
# AIC      BIC      logLik   deviance   df.resid 
# 1349.9   1379.9   -669.0   1337.9     1092 
# 
# Scaled residuals: 
#     Min      1Q  Median      3Q     Max 
# -2.4431 -1.1052  0.5449  0.6593  1.4576 
# 
# Random effects:
#     Groups      Name        Variance Std.Dev.
# eventregion (Intercept)     0.2431   0.493   
# Number of obs: 1098, groups:  eventregion, 132
# 
# Fixed effects:
#                             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)                  -0.7773     1.5179  -0.512    0.609    
# presented.new                 0.7889     1.5425   0.511    0.609    
# ethnicity_pct                 9.5392    10.6637   0.895    0.371    
# interest                      0.6961     0.1524   4.569  4.9e-06 ***
# presented.new:ethnicity_pct  -7.9823    10.7573  -0.742    0.458    
# ---
#     Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Correlation of Fixed Effects:
#             (Intr) prsnt. ethnc_ intrst
# presentd.nw -0.979                     
# ethncty_pct -0.942  0.927              
# interest    -0.089  0.023  0.005       
# prsntd.nw:_  0.934 -0.943 -0.991 -0.010

#---------------------------------------------------------------
#
# Summary for H3 
# (presentation and continued engagement)
#
# This analysis supports our prediction that presenting is associated
# with increased likelihood of continued engagement. Even when the control
# for prior level of interest in coding is added, presenting still has 
# a significant positive effect on continued engagement.
#
#---------------------------------------------------------------

# NOTE: In no case did adding the control for interest change the
# sign or the significance of any of our variables of interest.

screenreg(list(model.h2.eth.combo,
               model.h2.eth.notwhite,
               model.h2.eth.white))

texreg(list(model.h2.eth.combo,
            model.h2.eth.notwhite,
            model.h2.eth.white))

# =====================================================================
#                                   Model 1       Model 2       Model 3    
# ---------------------------------------------------------------------
# (Intercept)                      -0.04         -0.01        -0.02    
#                                   (0.13)        (0.16)       (0.31)   
# ethnicity_pct                     1.97 **       1.71 *       1.78    
#                                   (0.60)        (0.87)       (1.37)   
# interest                          0.65 ***      0.65 ***     0.64 ***
#                                   (0.09)        (0.11)       (0.15)   
# ---------------------------------------------------------------------
# AIC                            3998.78       2617.67      1404.26    
# BIC                            4023.05       2640.19      1424.39    
# Log Likelihood                -1995.39      -1304.83      -698.13    
# Num. obs.                      3190          2057         1133       
# Num. groups: eventregion        141           138          137       
# Var: eventregion (Intercept)      0.34          0.39         0.26    
# =====================================================================
#     *** p < 0.001; ** p < 0.01; * p < 0.05



screenreg(list(model.h2.gen.combo,
               model.h2.gen.notmale,
               model.h2.gen.male))

texreg(list(model.h2.gen.combo,
            model.h2.gen.notmale,
            model.h2.gen.male))

# =====================================================================
#                                  Model 1       Model 2      Model 3     
# ---------------------------------------------------------------------
# (Intercept)                       0.10         0.28          0.47    
#                                  (0.16)       (0.25)        (0.33)   
# gender_pct                        0.64        -1.04         -0.29    
#                                  (0.49)       (1.02)        (1.00)   
# interest                          0.65 ***     0.74 ***      0.58 ***
#                                  (0.09)       (0.14)        (0.12)   
# ---------------------------------------------------------------------
# AIC                            4007.71      1749.55       2280.65    
# BIC                            4031.99      1770.43       2302.69    
# Log Likelihood                -1999.86      -870.78      -1136.33    
# Num. obs.                      3190         1365          1825       
# Num. groups: eventregion        141          134           141       
# Var: eventregion (Intercept)      0.34         0.42          0.30    
# =====================================================================
#     *** p < 0.001; ** p < 0.01; * p < 0.05



screenreg(list(model.h1.gen,
               model.h3.gender))

texreg(list(model.h1.gen,
            model.h3.gender))

# ========================================================
#                                   Model 1       Model 2     
# --------------------------------------------------------
# (Intercept)                       0.35 ***      0.36    
#                                  (0.10)        (0.44)   
# notmale                          -0.18 *       -0.99 ** 
#                                  (0.08)        (0.37)   
# interest                          0.65 ***      0.69 ***
#                                  (0.09)        (0.09)   
# presented.new                                   0.14    
#                                                (0.39)   
# gender_pct                                     -0.45    
#                                                (0.69)   
# notmale:presented.new                           0.79 *  
#                                                (0.37)   
# --------------------------------------------------------
# AIC                            4004.09       3873.15    
# BIC                            4028.36       3915.44    
# Log Likelihood                -1998.04      -1929.58    
# Num. obs.                      3190          3107       
# Num. groups: eventregion        141           136       
# Var: eventregion (Intercept)      0.33          0.32    
# ========================================================
#     *** p < 0.001; ** p < 0.01; * p < 0.05

screenreg(list(model.h1.eth,
               model.h3.eth))

texreg(list(model.h1.eth,
               model.h3.eth))

# ========================================================
#     Model 1       Model 2     
# --------------------------------------------------------
#     (Intercept)                       0.39 ***      0.25    
# (0.11)        (0.53)   
# notwhite                         -0.19 *       -0.89    
# (0.08)        (0.47)   
# interest                          0.65 ***      0.69 ***
#     (0.09)        (0.09)   
# presented.new                                  -0.24    
# (0.53)   
# ethnicity_pct                                   1.96 ** 
#     (0.74)   
# notwhite:presented.new                          0.87    
# (0.49)   
# --------------------------------------------------------
#     AIC                            4004.26       3868.46    
# BIC                            4028.53       3910.75    
# Log Likelihood                -1998.13      -1927.23    
# Num. obs.                      3190          3107       
# Num. groups: eventregion        141           136       
# Var: eventregion (Intercept)      0.33          0.34    
# ========================================================
#     *** p < 0.001; ** p < 0.01; * p < 0.05


## predicted values for prototypical individuals
## https://stackoverflow.com/questions/39683369/change-order-of-one-set-of-factors-plotted-in-ggplot2

# determine the 10th and 90th percentiles for our actual ethnicity percent values
d_nounk <- d[d$ethnicity != "unknown",]
quants.eth_pcts <- quantile(d_nounk$ethnicity_pct, probs=c(0.1, 0.9))

# Use these as lower and upper limits of a sequence of length 200
eth_pct_range <- seq(quants.eth_pcts[1], quants.eth_pcts[2], length.out=200)

# Create a dataframe with 800 rows (for each given eth-pct value,
# there will be different combinations of ethnicity (white = 0, not white = 1) 
# and presented status (presented = 1, not presented = 0))

# We want to be able to plot 4 prediction lines -- using our model for H3.
# Use average attendee interest value (from actual data) for all hypothetical attendees.
# Assume they are all hypothetical attendees at a given value of x are 
# attending the same event (Same hypothetical Event ID and Region ID)
# referred to in this dataframe as eventregion "1-0".

# Plot 1: Proportion continued (y axis) vs Proportion Attendees of the Same Ethnicity
# Subplot 1a: Hypothetical Non-White Attendees, Subplot 1b: Hypothetical White Attendees
# Each subplot has 2 lines plotted -- one where attendees presented, 
# and one where attendees did not present. (See Figure 3 in Paper.)

proto.x.eth_pct <- data.frame(
    ethnicity_pct=rep(eth_pct_range, 4),
    notwhite=c(rep(0, 400), rep(1, 400)),
    Ethnicity=c(rep('White', 400), rep('Not White', 400)),
    presented.new=c(rep(0, 200), rep(1, 200),
                    rep(0, 200), rep(1, 200)),
    interest=mean(complete.cases(d$interest)),
    eventregion="1-0",
    individual=c(rep(0, 200),
                 rep(1, 200),
                 rep(2, 200),
                 rep(3, 200))
)

grid.tmp <- proto.x.eth_pct
grid.tmp$continued <- predict(model.h3.eth,
                              newdata=proto.x.eth_pct,
                              type="response")
test = grid.tmp$Ethnicity

grid.tmp$Ethnicity = factor(grid.tmp$Ethnicity, levels = c("Not White", "White"))

ggplot(data=grid.tmp) + aes(x=ethnicity_pct, y=continued, group=individual,
                            color=as.factor(presented.new)) +
    scale_color_discrete("Presented", breaks=c(0,1), labels=c("False", "True")) +
    scale_y_continuous("Proportion Continued") +
    scale_x_continuous("Proportion Attendees of Same Ethnicity") +
    facet_grid(.~Ethnicity) +
    geom_line() +
    geom_vline(xintercept = 0.2, linetype="dotted", color="black", size=1.0)
    
# Added vertical lines at 0.2 along the x-axes, to highlight the example of 
# predicted persistence with computing ("continued") if a participant attends 
# an event where 20% of the other attendees share the same ethnicity as that 
# participant.



## predicted values for prototypical individuals

# Follow a similar process using Gender, instead of Ethnicity.
# (See Figure 2 in Paper.)

quants.gen_pcts <- quantile(d_nounk$gender_pct, probs=c(0.1, 0.9))
gender_pct_range <- seq(quants.gen_pcts[1], quants.gen_pcts[2], length.out=200)

proto.x.gen_pct <- data.frame(
    gender_pct=rep(gender_pct_range, 4),
    notmale=c(rep(0, 400), rep(1, 400)),
    Gender=c(rep('Male', 400), rep('Not Male', 400)),
    presented.new=c(rep(0, 200), rep(1, 200),
                    rep(0, 200), rep(1, 200)),
    interest=mean(complete.cases(d$interest)),
    eventregion="1-0",
    individual=c(rep(0, 200),
                 rep(1, 200),
                 rep(2, 200),
                 rep(3, 200))
)

proto.x.gen_pct$continued <- predict(model.h3.gender,
                                    newdata=proto.x.gen_pct,
                                    type="response")

predict.glm(model.h3.gender,
            newdata=proto.x.gen_pct,
            type="response",
            se.fit=TRUE)

proto.x.gen_pct$Gender = factor(proto.x.gen_pct$Gender, levels = c("Not Male", "Male"))

ggplot(data=proto.x.gen_pct) + aes(x=gender_pct, y=continued, group=individual,
                                  color=as.factor(presented.new)) +
    scale_color_discrete("Presented", breaks=c(0,1), labels=c("False", "True")) +
    scale_y_continuous("Proportion Continued") +
    scale_x_continuous("Proportion Attendees of Same Gender") +
    facet_grid(.~Gender) +
    geom_line() +
    geom_vline(xintercept = 0.2, linetype="dotted", color="black", size=1.0)


# Added vertical lines at 0.2 along the x-axes, to highlight the example of 
# predicted persistence with computing ("continued") if a participant attends 
# an event where 20% of the other attendees share the same gender as that 
# participant.